clear
set more off
macro drop all
capture log close


/********************************************************************************
Discrimination in Multi-Phase Systems: Evidence from Child Protection
Create Master Child*Case Level DHHS Data

Created on: 2/26/19

Last Modified on: 2/20/2024

Description: Create master DHHS dataset at the child*case level.

Note that we have removed the file directory names from this program for 
confidentiality reasons.
********************************************************************************/

** Setting the Directory
global rawdata 
global cleandata 
global tmp 

/********************************************************************************

For reference: the following identifiers appear in each DHHS Dataset:

1) Allegations: inv_caseid, src_caseid, ong_caseid
2) Case: intake_caseid, inv_caseid, src_caseid
3) Workers: caseid, src_caseid
4) Allegations Match: intake_id

Goal of this do file is to create link to allow for easy merging of the various
DHHS Files.

*******************************************************************************/

**Start with allegations, merge on case file
use "${cleandata}alleg_child_case_level.dta", clear
sort vicid inv_caseid
gzmerge 1:1 vicid inv_caseid using "${cleandata}case_clean.dta".gz
keep if _merge==3
drop _merge

**Bring in child level file
sort vicid
merge m:1 vicid using "${cleandata}child_level_clean.dta"
keep if _merge==3
drop _merge

**Bring in the workers file using the various case identifiers  
sort vicid src_caseid
merge 1:1 vicid src_caseid using "${cleandata}workers_child_case_level.dta"

**Save a matches file and a nonmatches file.
preserve
keep if _merge==3
drop _merge
save "${tmp}matches_worker.dta", replace
restore 

preserve
keep if _merge==1
drop _merge
**drop all info from the workers file
drop worker* caseid
save "${tmp}nonmatches_worker.dta", replace
restore

**Use the nonmatches file to try to match using different case id variables
use "${cleandata}workers_child_case_level.dta", clear
rename caseid inv_caseid
merge 1:1 vicid inv_caseid using "${tmp}nonmatches_worker.dta"

use "${cleandata}workers_child_case_level.dta", clear
rename caseid ong_caseid
merge 1:m vicid ong_caseid using "${tmp}nonmatches_worker.dta"
/*Note that the child ID and the ongoing caseid do not uniquely identify obs in
the using data here BUT the ongoing ID should be tied to a child and since I'm
eventually only going to be keeping obs which are the FIRST investigation for 
each child, then merging 1:m instead of 1:1 really shouldn't skew anything for me.
*/
keep if _merge==3
drop _merge
append using "${tmp}matches_worker.dta"

**Bring in information from the allegations match file
sort vicid intake_id
merge 1:1 vicid intake_id using "${cleandata}alleg_match_child_case_level.dta"
drop if _merge==2
gen fc=_merge==3
la var fc "Child Placed into Foster Care as a Result of Investigation"
bysort vicid: egen max_merge=max(_merge)
gen fc_ever=(max_merge==3)
la var fc_ever "Child was Ever Placed in Foster Care"
drop _merge
format removal_date %td
bysort vicid: egen removal_date_first=min(removal_date)
la var removal_date_first "Child's First Removal Date"
format removal_date_first %td

**Impute missing zipcode information using both the victim and perpetrator zipcode
compare zipcode_vic zipcode_perp 
replace zipcode_vic=zipcode_perp if zipcode_vic==. & zipcode_perp!=.
drop zipcode_perp child_role max_merge

**Bring in information from living arrangements file
merge m:1 vicid using "$cleandata/living_arrangement_clean.dta"
tab _merge
drop if _merge==2
drop _merge
drop fc_startdt placementid 

**Label Master Child*Case Level Data
la var domvi "Domestic Violence Allegation"
la var drugres "Drug Residence Allegation"
la var failprot "Failure to Protect Allegation"
la var impsup "Improper Supervision Allegation"
la var maltreat "Maltreatment Allegation"
la var medneg "Medical Neglect Allegation"
la var alleg_oth "Other Allegation"
la var phyab "Physical Abuse Allegation"
la var phyneg "Physical Neglect Allegation"
la var sexab "Sexual Abuse Allegation"
la var subab "Substance Abuse Allegation"
la var threat_harm "Threatened Harm Allegation"
la var mom "Perpetrator was Mother"
la var dad "Perpetrator was Father"
la var notrel "Perpetrator was Not Related"
la var rel "Perpetrator was non-Parent Relative"
la var parent_unkn "Perpetrator was Unknown Parent"
la var preponderance "Allegation was Substantiated"
la var removal_date "First Removal Date Associated with Investigation"

foreach alleg in domvi drugres failprot impsup maltreat medneg alleg_oth ///
	phyab phyneg sexab subab threat_harm {
		if "`alleg'"=="domvi" {
			local a="Domestic Violence"
		}
		else if "`alleg'"=="drugres" {
			local a="Drug Residence" 
		}
		else if "`alleg'"=="failprot" {
			local a="Failure to Protect" 
		}	
		else if "`alleg'"=="impsup" {
			local a="Improper Supervision" 
		}
		else if "`alleg'"=="maltreat" {
			local a="Maltreatment" 
		}
		else if "`alleg'"=="medneg" {
			local a="Medical Neglect" 
		}
		else if "`alleg'"=="alleg_oth" {
			local a="Other Allegation" 
		}	
		else if "`alleg'"=="phyab" {
			local a="Physical Abuse" 
		}
		else if "`alleg'"=="phyneg" {
			local a="Physical Neglect" 
		}
		else if "`alleg'"=="sexab" {
			local a="Sexual Abuse" 
		}
		else if "`alleg'"=="subab" {
			local a="Substance Abuse" 
		}	
		else if "`alleg'"=="threat_harm" {
			local a="Threatened Harm" 
		}
	
		foreach perp in mom dad notrel parent_unkn rel {
			if "`perp'"=="mom" {
				local p="Mother"
			}
			else if "`perp'"=="dad" {
				local p="Father"
			}
			else if "`perp'"=="notrel" {
				local p="Not Related"
			}
			else if "`perp'"=="parent_unkn" {
				local p="Unknown Parent"
			}
			else if "`perp'"=="rel" {
				local p="Relative"
			}
			
			la var `alleg'_`perp'_prep "Preponderance of Allegation `a' by `p'"
		}

}

order vicid inv_caseid complaint_date complaint_year female dob racecat 
sort vicid inv_caseid
compress
save "${cleandata}master_dhhs.dta", replace

**Erase temp files
erase ${tmp}nonmatches_worker.dta
erase ${tmp}matches_worker.dta








